library(GENIE3)
library(doParallel)
library(igraph)
library(tidyverse)
library(DT)
library(reticulate)
library(learn2count)
library(rbenchmark)
library(reshape2)
library(gridExtra)
library(DiagrammeR)
library(pROC)
library(JRF)
library(DiagrammeRsvg)
library(rsvg)
library(RColorBrewer)
library(rbenchmark)
use_python("/usr/bin/python3", required = TRUE)
arboreto <- import("arboreto.algo")
pandas <- import("pandas")
numpy <- import("numpy")
source("generate_adjacency.R")
source("symmetrize.R")
source("pscores.R")
source("plotg.R")
source("compare_consensus.R")
source("create_consensus.R")
source("earlyj.R")
source("plotROC.R")
source("cutoff_adjacency.R")
source("infer_networks.R")
grViz_output <- DiagrammeR::grViz("
digraph biological_workflow {
# Set up the graph attributes
graph [layout = dot, rankdir = TB]
# Define consistent node styles
node [shape = rectangle, style = filled, color = lightblue, fontsize = 12]
# Define nodes for each step
StartNode [label = 'Ground Thruth - String Regulatory Network', shape = oval, color = seagreen, fontcolor = black]
AdjacencyMatrix [label = 'Thruth Adjacency Matrix', shape = rectangle, color = seagreen]
SimulateData [label = 'Simulate Single-Cell Data', shape = rectangle, color = goldenrod]
# Reconstruction using Three Packages
LateIntegration [label = 'Late\nIntegration', shape = oval, color = khaki]
EarlyIntegration [label = 'Early\nIntegration', shape = oval, color = khaki]
Jointanalysis [label = 'Joint\nanalysis', shape = oval, color = khaki]
# Process
earlyj [label = 'earlyj.R', shape=diamond, color=lightblue, fontcolor=black]
networkinference [label = 'infer_networks.R\nGENIE3\nGRNBoost2\nJRF', shape = rectangle, color = goldenrod, fontcolor=black]
symmetrize [label = 'symmetrize.R', shape = rectangle, color = goldenrod, fontcolor=black]
plotROC [label = 'plotROC.R', shape=diamond, color=lightblue, fontcolor=black]
generateadjacency [label='generate_adjacency.R\nWeighted Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
cutoffadjacency [label='cutoff_adjacency.R\nBinary Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
pscores [label='pscores.R\nTPR\nFPR\nF1\nAccuracy\nPrecision', shape=diamond, color=lightblue, fontcolor=black]
voting [label='Edges voting', shape=diamond, color=lightblue, fontcolor=black]
plotgcompare [label='plotg.R\ncompare_consesus.R\nPlot Graphs', shape=rectangle, color=goldenrod, fontcolor=black]
# Define the workflow structure
StartNode -> AdjacencyMatrix
AdjacencyMatrix -> SimulateData
SimulateData -> LateIntegration
SimulateData -> EarlyIntegration
SimulateData -> Jointanalysis
EarlyIntegration -> earlyj
earlyj -> networkinference
LateIntegration -> networkinference
Jointanalysis -> networkinference
networkinference -> symmetrize
symmetrize -> plotROC
symmetrize -> generateadjacency
generateadjacency -> cutoffadjacency
cutoffadjacency -> pscores
cutoffadjacency -> voting
voting -> plotgcompare
voting -> pscores
}
")
svg_code <- export_svg(grViz_output)
rsvg::rsvg_png(charToRaw(svg_code), "./../analysis/flowchart.png")
grViz_output
adjm <- read.table("./../data/adjacency_matrix.csv", header = T, row.names = 1, sep = ",") %>% as.matrix()
diag(adjm) <- 0
adjm %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "Ground Truth")
gtruth <- igraph::graph_from_adjacency_matrix(adjm, mode = "undirected", diag = F)
num_nodes <- vcount(gtruth)
num_edges <- ecount(gtruth)
set.seed(1234)
plot(gtruth,
main = paste("Ground Truth\nNodes:", num_nodes, "Edges:", num_edges),
vertex.label.color = "black",
vertex.size = 6,
edge.width = 2,
vertex.label = NA,
vertex.color = "steelblue",
layout = igraph::layout_with_fr)
ncell <- 500
nodes <- nrow(adjm)
set.seed(1130)
mu_values <- c(3, 5, 7)
count_matrices <- lapply(1:3, function(i) {
set.seed(1130 + i)
mu_i <- mu_values[i]
count_matrix_i <- simdata(n = ncell, p = nodes, B = adjm, family = "ZINB",
mu = mu_i, mu_noise = 1, theta = 0.5, pi = 0.2)
count_matrix_df <- as.data.frame(count_matrix_i)
colnames(count_matrix_df) <- colnames(adjm)
rownames(count_matrix_df) <- paste("cell", 1:nrow(count_matrix_df), sep = "")
return(count_matrix_df)
})
count_matrices[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "Simulated count matrix")
saveRDS(count_matrices, "./../analysis/count_matrices.RDS")
set.seed(1234)
genie3_late <- infer_networks(count_matrices, method="GENIE3")
saveRDS(genie3_late, "./../analysis/genie3_late.RDS")
genie3_late[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 output")
genie3_late_wadj <- generate_adjacency(genie3_late, ground.truth = adjm)
sgenie3_late_wadj <- symmetrize(genie3_late_wadj, weight_function = "mean")
plotROC(sgenie3_late_wadj, adjm, plot_title = "ROC curve - GENIE3 Late Integration")
sgenie3_late_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 symmetrize output")
source("cutoff_adjacency.R")
sgenie3_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
weighted_adjm_list = sgenie3_late_wadj,
ground.truth = adjm,
n = 2,
method = "GENIE3")
## Matrix 1 Mean 95th Percentile Cutoff: 0.009957001
## Matrix 2 Mean 95th Percentile Cutoff: 0.009833172
## Matrix 3 Mean 95th Percentile Cutoff: 0.009835106
sgenie3_late_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 adjacency")
scores <- pscores(adjm, sgenie3_late_adj)
scores$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sgenie3_late_adj)
consesusm <- create_consensus(sgenie3_late_adj, method="vote")
consesusu <- create_consensus(sgenie3_late_adj, method="union")
par(mfrow = c(2, 1))
scores <- pscores(adjm, list(consesusm))
scoresu <- pscores(adjm, list(consesusu))
par(mfrow = c(1, 1))
scores$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
scoresu$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)
ajm_compared <- compare_consensus(consesusu, adjm)
set.seed(1234)
grnb_late <- infer_networks(count_matrices, method="GRNBoost2")
saveRDS(grnb_late, "./../analysis/grnb_late.RDS")
grnb_late[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 output")
grnb_late_wadj <- generate_adjacency(grnb_late, ground.truth = adjm)
sgrnb_late_wadj <- symmetrize(grnb_late_wadj, weight_function = "mean")
plotROC(sgrnb_late_wadj, adjm, plot_title = "ROC curve - GRNBoost2 Late Integration")
sgrnb_late_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 symmetrize output")
sgrnb_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
weighted_adjm_list = sgrnb_late_wadj,
ground.truth = adjm,
n = 2,
method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 0.8251794
## Matrix 2 Mean 95th Percentile Cutoff: 0.8524455
## Matrix 3 Mean 95th Percentile Cutoff: 0.84979
sgrnb_late_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 adjacency")
scores <- pscores(adjm, sgrnb_late_adj)
scores$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sgrnb_late_adj)
consesusm <- create_consensus(sgrnb_late_adj, method="vote")
consesusu <- create_consensus(sgrnb_late_adj, method="union")
par(mfrow = c(2, 1))
scores <- pscores(adjm, list(consesusm))
scoresu <- pscores(adjm, list(consesusu))
par(mfrow = c(1, 1))
scores$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
scoresu$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)
ajm_compared <- compare_consensus(consesusu, adjm)
early_matrix <- list(earlyj(count_matrices))
set.seed(1234)
genie3_early <- infer_networks(early_matrix, method="GENIE3")
saveRDS(genie3_early, "./../analysis/genie3_early.RDS")
genie3_early[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 output")
genie3_early_wadj <- generate_adjacency(genie3_early, ground.truth = adjm)
sgenie3_early_wadj <- symmetrize(genie3_early_wadj, weight_function = "mean")
plotROC(sgenie3_early_wadj, adjm, plot_title = "ROC curve - GENIE3 Early Integration")
sgenie3_early_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 symmetrize output")
sgenie3_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
weighted_adjm_list = sgenie3_early_wadj,
ground.truth = adjm,
n = 2,
method = "GENIE3")
## Matrix 1 Mean 95th Percentile Cutoff: 0.009730871
sgenie3_early_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 adjacency")
scores <- pscores(adjm, sgenie3_early_adj)
scores$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sgenie3_early_adj)
ajm_compared <- compare_consensus(sgenie3_early_adj[[1]], adjm)
set.seed(1234)
grnb_early <- infer_networks(early_matrix, method="GRNBoost2")
saveRDS(grnb_early, "./../analysis/grnb_early.RDS")
grnb_early[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 output")
grnb_early_wadj <- generate_adjacency(grnb_early, ground.truth = adjm)
sgrnb_early_wadj <- symmetrize(grnb_early_wadj, weight_function = "mean")
plotROC(sgrnb_early_wadj, adjm, plot_title = "ROC curve - GRNBoost2 Early Integration")
grnb_early_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 symmetrize output")
sgrnb_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
weighted_adjm_list = sgrnb_early_wadj,
ground.truth = adjm,
n = 2,
method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 3.385248
sgrnb_early_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 adjacency")
scores <- pscores(adjm, sgrnb_early_adj)
scores$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sgrnb_early_adj)
ajm_compared <- compare_consensus(sgrnb_early_adj[[1]], adjm)
#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")
#jrf_mat <- infer_networks(count_matrices, method="JRF")
#jrf_matrices <- lapply(count_matrices, t)
#jrf_matrices_norm <- lapply(jrf_matrices,function(x) {
# (x - mean(x)) / sd(x)
# })
#genes <- rownames(jrf_matrices_norm[[1]])
#netout <- JRF(X = jrf_matrices_norm,
# genes.name = genes,
# ntree = 500,
# mtry = round(sqrt(length(genes) - 1)))
#netout %>%
# datatable(extensions = 'Buttons',
# options = list(
# dom = 'Bfrtip',
# buttons = c('csv', 'excel'),
# scrollX = TRUE,
# pageLength = 10),
# caption = "JRF output")
#out.perm <- Run_permutation(jrf_matrices_norm,mtry=round(sqrt(length(genes)-1)),ntree=500, genes,3)
#out <- JRF_permutation(jrf_matrices_norm,mtry=round(sqrt(length(genes)-1)),ntree=500,genes,2)
#final.net <- JRF_network(netout,out.perm,0.001)
#final.net
#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")
jrf_mat <- infer_networks(count_matrices, method="JRF")
jrf_mat[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "JRF output")
jrf_list <- list()
importance_columns <- grep("importance", names(jrf_mat[[1]]), value = TRUE)
for (i in seq_along(importance_columns)) {
# Select the 'gene1', 'gene2', and the current 'importance' column
df <- jrf_mat[[1]][, c("gene1", "gene2", importance_columns[i])]
# Rename the importance column to its original name (e.g., importance1, importance2, etc.)
names(df)[3] <- importance_columns[i]
# Add the data frame to the output list
jrf_list[[i]] <- df
}
saveRDS(jrf_list, "./../analysis/jrf.RDS")
jrf_list[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "JRF output")
jrf_wadj <- generate_adjacency(jrf_list, ground.truth = adjm)
sjrf_wadj <- symmetrize(jrf_wadj, weight_function = "mean")
plotROC(sjrf_wadj, adjm, plot_title = "ROC curve - JRF Late Integration")
sjrf_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "JRF symmetrize output")
sjrf_adj <- cutoff_adjacency(count_matrices = count_matrices,
weighted_adjm_list = sjrf_wadj,
ground.truth = adjm,
n = 2,
method = "JRF")
## Matrix 1 Mean 95th Percentile Cutoff: 4.962231
## Matrix 2 Mean 95th Percentile Cutoff: 4.900464
## Matrix 3 Mean 95th Percentile Cutoff: 4.869401
sjrf_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "JRF adjacency")
scores <- pscores(adjm, sjrf_adj)
scores$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sjrf_adj)
consesusm <- create_consensus(sjrf_adj, method="vote")
consesusu <- create_consensus(sjrf_adj, method="union")
scores <- pscores(adjm, list(consesusm))
scoresu <- pscores(adjm, list(consesusu))
scores$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
scoresu$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)
ajm_compared <- compare_consensus(consesusu, adjm)
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=it_IT.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=it_IT.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=it_IT.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=it_IT.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] doRNG_1.8.2 rngtools_1.5.2 RColorBrewer_1.1-3 rsvg_2.6.1
## [5] DiagrammeRsvg_0.1 JRF_0.1-4 pROC_1.18.0 DiagrammeR_1.0.11
## [9] gridExtra_2.3 reshape2_1.4.4 rbenchmark_1.0.0 learn2count_0.3.2
## [13] reticulate_1.34.0 DT_0.22 forcats_0.5.1 stringr_1.4.0
## [17] dplyr_1.0.9 purrr_0.3.4 readr_2.1.2 tidyr_1.2.0
## [21] tibble_3.1.7 ggplot2_3.3.6 tidyverse_1.3.1 igraph_2.0.3
## [25] doParallel_1.0.17 iterators_1.0.14 foreach_1.5.2 GENIE3_1.16.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ellipsis_0.3.2
## [3] XVector_0.34.0 GenomicRanges_1.46.1
## [5] fs_1.5.2 rstudioapi_0.13
## [7] farver_2.1.0 fansi_1.0.3
## [9] lubridate_1.8.0 xml2_1.3.3
## [11] codetools_0.2-18 knitr_1.39
## [13] jsonlite_1.8.0 broom_0.8.0
## [15] dbplyr_2.1.1 png_0.1-7
## [17] graph_1.72.0 compiler_4.1.0
## [19] httr_1.4.3 backports_1.4.1
## [21] assertthat_0.2.1 Matrix_1.6-1.1
## [23] fastmap_1.1.0 cli_3.3.0
## [25] distributions3_0.2.2 visNetwork_2.1.2
## [27] htmltools_0.5.2 tools_4.1.0
## [29] gtable_0.3.0 glue_1.6.2
## [31] GenomeInfoDbData_1.2.7 V8_6.0.0
## [33] Rcpp_1.0.8.3 Biobase_2.54.0
## [35] cellranger_1.1.0 jquerylib_0.1.4
## [37] vctrs_0.4.1 crosstalk_1.2.0
## [39] xfun_0.30 rvest_1.0.2
## [41] lifecycle_1.0.1 MASS_7.3-57
## [43] zlibbioc_1.40.0 scales_1.2.0
## [45] hms_1.1.1 MatrixGenerics_1.6.0
## [47] SummarizedExperiment_1.24.0 SingleCellExperiment_1.16.0
## [49] yaml_2.3.5 curl_4.3.2
## [51] sass_0.4.1 stringi_1.7.6
## [53] highr_0.9 S4Vectors_0.32.4
## [55] BiocGenerics_0.40.0 GenomeInfoDb_1.30.1
## [57] rlang_1.1.4 pkgconfig_2.0.3
## [59] matrixStats_0.62.0 bitops_1.0-7
## [61] evaluate_0.15 lattice_0.20-45
## [63] labeling_0.4.2 htmlwidgets_1.5.4
## [65] tidyselect_1.1.2 plyr_1.8.7
## [67] magrittr_2.0.3 R6_2.5.1
## [69] IRanges_2.28.0 generics_0.1.2
## [71] DelayedArray_0.20.0 DBI_1.1.2
## [73] pillar_1.7.0 haven_2.5.0
## [75] withr_2.5.0 RCurl_1.98-1.6
## [77] modelr_0.1.8 crayon_1.5.1
## [79] utf8_1.2.2 iZID_0.0.1
## [81] tzdb_0.3.0 rmarkdown_2.14
## [83] grid_4.1.0 readxl_1.4.0
## [85] reprex_2.0.1 digest_0.6.29
## [87] stats4_4.1.0 munsell_0.5.0
## [89] bslib_0.3.1